###############################################################################   
#### Replication Materials                                                 #### 
#### Taegyoon Kim, 2022. Violent Political Rhetoric on Twitter.            ####
#### Political Science Research and Methods                                ####
###############################################################################  


###############################################################################
################################### Set Up ####################################
###############################################################################

# packages -------------------------

lapply(c('readr', 'gridExtra', 'xtable', 'hexbin', 'ggplot2', 'dplyr',
         'ggthemes', 'dplyr', 'tidyr', 'magrittr', 'scales'), 
       require, 
       character.only = TRUE)


###############################################################################
######################## Generate Figure A1 & Table A1 ########################
###############################################################################

# load count and handle data -------------------------

path_data <- '/kim_psrm_replication/data/' 
path_output <- '/kim_psrm_replication/output/' 

df_prop <- read_csv(paste0(path_data, 'df_prop.csv'))

# figure a1 -------------------------

df_prop_agg <- df_prop %>% # aggregate proportion data at the politician-level 
  group_by(handle) %>%
  summarise(
    party_bi = first(party_bi),
    gender = first(gender),
    handle = first(handle),
    office = first(office),
    name = first(name),
    tweet_count_handle_sum = sum(tweet_count_handle),
    tweet_count_name_sum = sum(tweet_count_name)
    )

df_prop_agg$prop <- (df_prop_agg$tweet_count_name_sum + 0.001) / (df_prop_agg$tweet_count_handle_sum + 0.001) # dealing with zeros

ggplot(df_prop_agg, aes(x = prop)) + 
  geom_histogram(color = 'black', 
                 fill = 'white', 
                 bins = 15) + 
  scale_x_continuous(n.breaks = 10, 
                     trans = log_trans(), 
                     labels = number_format(accuracy = 0.01, scale = 100)) + 
  labs(x = '\nProportion (number of full name tweets / number of mention tweets)',
       y = 'Number of Accounts') + 
  geom_vline(aes(xintercept = median(prop)), 
             colour = 'red', 
             linetype = 'longdash') + 
  ggplot2::annotate('text', 
           x = 0.55, 
           y = 130, 
           label = 'Median: 13.04%', 
           colour = 'red', 
           size = 6) +
  theme_classic() + 
  theme(text = element_text(size = 17)) 

ggsave(paste0(path_output, 'figa1.pdf'),
       dpi = 600,
       width = 10,
       height = 7,
       units = 'in')

# table a1 -------------------------

prop_compare <- data.frame(
  Type = c('Women', 'Men', 'Republican', 'Non-Republican', 'Governors', 'Senators', 'Representatives', 'Total'),
  Proportion = c(median(df_prop_agg[which(df_prop_agg$gender=='F'),]$prop, digits = 1) * 100,
                 median(df_prop_agg[which(df_prop_agg$gender=='M'),]$prop) * 100,
                 median(df_prop_agg[which(df_prop_agg$party_bi=='R'),]$prop) * 100,
                 median(df_prop_agg[which(df_prop_agg$party_bi=='Non-R'),]$prop) * 100,
                 median(df_prop_agg[which(df_prop_agg$office=='governor'),]$prop) * 100,
                 median(df_prop_agg[which(df_prop_agg$office=='senator'),]$prop) * 100,
                 median(df_prop_agg[which(df_prop_agg$office=='representative'),]$prop) * 100,
                 median(df_prop_agg$prop) * 100
                 )
  )

prop_compare <- prop_compare %>%
  mutate(Proportion = round(prop_compare$Proportion, 1))

write.csv(prop_compare, paste0(path_output, 'tbla1.csv'))